function [T_episode]=simulation_exp(alpha,beta,T,vh,vl,mu_l,sigma_l,grid_middle,tick,grid_size,N)

%Runs a learning experiment once, for T episodes. 

%This function returns T_episode, a 1 x 4 vector reporting the best ask a_{t} in the experiment 
%at time t \in \{500000, 700000, 900000, 1000000\} (in colums).

%The function also uses all_episodes, a Tx(3*N+1) matrix with the values in all episodes 1
%to T of A_N (quoted price of AMM N), opt_N (greedy price of AMM N),
%Profit_N (profit of AMM N), and a_min (lowest quoted price).

%In the case N=2 for instance the columns of all_episodes are ordered as follows:
%1: A_1
%2: A_2
%3: opt_1
%4: opt_2
%5: Profit_1
%6: Profit_2
%7: a_min

%Pre-allocate
all_episodes = zeros(T,6*N+2);

%Inizialize Q-matrix with random values higher than monopoly expected
%profit. With the baseline parameters we have 139 rows (one for each price) and 2 columns (one for each AMM).

Q_n=3+(6-3)*rand((2*grid_size+1),N);

%Generate a vector of T observations with prob=0.5 to be vl and prob=0.5 to be vh.

v_tilde = randsample([vl, vh], T, true)';             %compute the value of the asset in each episode
l  = normrnd(mu_l,sigma_l,T,1);                       %vector of random draws of l in each episode
vc = v_tilde +l;                                      %vector of investor valuations in each episode

%Generate vector to determine experimentation episodes
epsilon = exp(-beta*(1:1:T));               %Vector of experimentation probabilities

%Generate T x N matrix with 1 in (t,n) if AMM n experiments at time t.
Experiment = zeros(T,N);

parfor n=1:N
    Experiment(:,n) = binornd(1,epsilon);  
end

%Loop over all episodes.
for t = 1:T
    %We compute the greedy price in episode t, for each AMM.
    %We preallocate the index of price (from 1 to 2*grid_size+1) chosen by each AMM:
    s = zeros(1,N);
    for i=1:N
        maxvector = find(Q_n(:,i) == max(Q_n(:,i))); %Indices of all the values corresponding to a maximum. 
        %Note: finding all the values with maxvector is irrelevant if the Q_matrix has been initialized with
        %continuous random variables, but is important otherwise.        
        s(1,i)  = maxvector(randi([1 length(maxvector)],1,1)); %Randomize if there are multiple greedy prices
        all_episodes(t,N+i) = (grid_middle - grid_size*tick - tick)+(s(1,i)*tick); %Convert the price index 
        %into the actual price and record the greedy price of AMM i at time t

    %Compute the actual price chosen by AMM i based on experimentation or exploitation 
    %If Experiment=1 AMM i explores at this round. 

        if Experiment(t,i) == 1
            s(1,i)  = randi([1 (2*grid_size+1)],1,1);  %Generates a random index in the range 1 to 2*grid_size+1
            all_episodes(t,i) = (grid_middle - grid_size*tick - tick) + tick*s(1,i); %Records the corresponding price as the price quoted by AMM i

     %If Experiment=0 the player exploits at this round. 
        else
            all_episodes(t,i) = all_episodes(t,N+i) ;            %Records the greedy price as the actual price
        end
    end 

    %We compute the profit in episode t. Investor buys if vc is greater than 
    %the best price a_min, such that profit is (a_min - v_tilde) if [vc >= a_min], and zero otherwise.
    %If AMMs set the same price the profit is split between the AMMs.

    all_episodes(t,3*N+1) = min(all_episodes(t,1:N));           %compute the lowest price:
    if  all_episodes(t,3*N+1) <= vc(t)                          %check that the customer buys      
        index = find(all_episodes(t,3*N+1) == all_episodes(t,1:N)); %find all AMMs that set a_min
        m = size(index,2);                                      %compute the number of AMMs that set a_min
        for i=index
            all_episodes(t,2*N+i) = (all_episodes(t,3*N+1) - v_tilde(t) ) / m;    %compute these AMMs' profits. 
            % Note that for all other players or if the customer doesn't buy the profit remains the pre-allocated value of zero.
        end
    end 

    %Update the Q-Matrix for each player 
    for j=1:N
        %Update the q-value associated with the price actually played:
        Q_n(s(1,j),j) = alpha*all_episodes(t,2*N+j) + (1-alpha)*Q_n(s(1,j),j);
    end

end %ends the loop on episodes t

%Compute for how many episodes until the end all AMMs have played the same
%price
C_t=1;
while all_episodes(end,1:N) == all_episodes(end-C_t, 1:N)
    C_t = C_t+1;
end

%Record price in episode t for different t:
T_episode = [all_episodes(500000,3*N+1), all_episodes(700000,3*N+1), all_episodes(900000,3*N+1), all_episodes(1000000,3*N+1)];

